{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Code generation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We teach Adala how to generate a code to convert one json format to another.\n",
    "\n",
    "The following are the examples from Huggingface Inference API responses:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "\n",
    "df = pd.DataFrame([\n",
    "    {'payload': '{\"outputs\": [{\"entity_group\": \"ORG\", \"score\": 0.9994323253631592, \"word\": \"Apple Inc\", \"start\": 0, \"end\": 9}, {\"entity_group\": \"MISC\", \"score\": 0.997283935546875, \"word\": \"iPhone 14\", \"start\": 24, \"end\": 33}], \"inputs\": \"Apple Inc. released the iPhone 14 in September 2022, featuring satellite connectivity.\"}'},\n",
    "    {'payload': '{\"outputs\": [{\"entity_group\": \"MISC\", \"score\": 0.9428057670593262, \"word\": \"Ubuntu\", \"start\": 26, \"end\": 32}, {\"entity_group\": \"MISC\", \"score\": 0.962793231010437, \"word\": \"Ubuntu\", \"start\": 51, \"end\": 57}, {\"entity_group\": \"ORG\", \"score\": 0.998673677444458, \"word\": \"Canonical Ltd\", \"start\": 87, \"end\": 100}], \"inputs\": \"The latest version of the Ubuntu operating system, Ubuntu 22.04, was made available by Canonical Ltd. in April.\"}'},\n",
    "    {'payload': '{\"outputs\": [{\"entity_group\": \"ORG\", \"score\": 0.979661226272583, \"word\": \"Tesla\", \"start\": 0, \"end\": 5}, {\"entity_group\": \"ORG\", \"score\": 0.8453200459480286, \"word\": \"Cybertru\", \"start\": 12, \"end\": 20}, {\"entity_group\": \"MISC\", \"score\": 0.7452507019042969, \"word\": \"##ck\", \"start\": 20, \"end\": 22}, {\"entity_group\": \"PER\", \"score\": 0.9728273153305054, \"word\": \"El\", \"start\": 78, \"end\": 80}, {\"entity_group\": \"PER\", \"score\": 0.9739447236061096, \"word\": \"##on Musk\", \"start\": 80, \"end\": 87}], \"inputs\": \"Tesla\\'s new Cybertruck is set to hit the roads in late 2023, according to CEO Elon Musk.\"}'},\n",
    "    {'payload': '{\"outputs\": [{\"entity_group\": \"ORG\", \"score\": 0.9987253546714783, \"word\": \"Google\", \"start\": 0, \"end\": 6}, {\"entity_group\": \"ORG\", \"score\": 0.9994670748710632, \"word\": \"Alphabet Inc\", \"start\": 25, \"end\": 37}, {\"entity_group\": \"MISC\", \"score\": 0.9959796667098999, \"word\": \"Pixel 6\", \"start\": 91, \"end\": 98}], \"inputs\": \"Google\\'s parent company, Alphabet Inc., saw a rise in stock prices after the launch of the Pixel 6.\"}'},\n",
    "    {'payload': '{\"outputs\": [{\"entity_group\": \"ORG\", \"score\": 0.999211311340332, \"word\": \"Samsung Electronics\", \"start\": 0, \"end\": 19}, {\"entity_group\": \"ORG\", \"score\": 0.9967896342277527, \"word\": \"LG Display\", \"start\": 38, \"end\": 48}, {\"entity_group\": \"MISC\", \"score\": 0.47527530789375305, \"word\": \"O\", \"start\": 56, \"end\": 57}, {\"entity_group\": \"MISC\", \"score\": 0.5774009227752686, \"word\": \"##D\", \"start\": 59, \"end\": 60}], \"inputs\": \"Samsung Electronics is competing with LG Display in the OLED market.\"}'}\n",
    "])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The goal is to convert them into the Label Studio format.\n",
    "\n",
    "`SimpleCodeValidationEnvironment` automatically validates and feedback is exchanged with agents to improve based on detected errors."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from adala.skills import AnalysisSkill, ParallelSkillSet, LinearSkillSet\n",
    "from adala.agents import Agent\n",
    "from adala.environments import StaticEnvironment, WebStaticEnvironment, SimpleCodeValidationEnvironment\n",
    "from adala.runtimes import OpenAIChatRuntime\n",
    "\n",
    "\n",
    "skillset = ParallelSkillSet(skills=[\n",
    "    AnalysisSkill(\n",
    "        name='code_generation',\n",
    "        input_template=\"Input JSON: {payload}\",\n",
    "        output_template=\"Code: {code}\",\n",
    "        instructions='''\n",
    "Format description: \n",
    "id - Identifier for the labeling task from the dataset.\n",
    "data - Data dict copied from the input data task format.\n",
    "project - Identifier for a specific project in Label Studio.\n",
    "predictions - Array containing the labeling results for the task.\n",
    "predictions.id - Identifier for the completed task.\n",
    "predictions.lead_time - Time in seconds to label the task.\n",
    "predictions.result - Array containing the results of the labeling or annotation task.\n",
    "result.id - Identifier for the specific annotation result for this task.\n",
    "result.from_name - Name of the tag used to label the region. See control tags.\n",
    "result.to_name\t- Name of the object tag that provided the region to be labeled. See object tags.\n",
    "result.type\t- Type of tag used to annotate the task.\n",
    "result.value - Tag-specific value that includes details of the result of labeling the task. The value - structure depends on the tag for the label. For more information, see Explore each tag.\n",
    "predictions.score - The overall score of the result, based on the probabilistic output, confidence level, or other.\n",
    "\n",
    "Following the target JSON format provided, write a minimal python code that transform input json to this format. \\\n",
    "Assume the input data will be read from the standard input (stdin), and the output generated will be directed to the standard output (stdout).'''\n",
    ")])\n",
    "\n",
    "env = SimpleCodeValidationEnvironment(df=df, code_fields={'code': 'payload'})\n",
    "\n",
    "agent = Agent(skills=skillset, environment=env)\n",
    "agent.learn(learning_iterations=1, num_feedbacks=1, batch_size=3)\n",
    "predictions = agent.run()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here is the code produced by Adala agent:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "import json\n",
      "import sys\n",
      "\n",
      "# read input from stdin\n",
      "input_json = sys.stdin.read()\n",
      "\n",
      "# parse input json\n",
      "input_data = json.loads(input_json)\n",
      "\n",
      "# initialize output json\n",
      "output_data = {}\n",
      "\n",
      "# add id to output json\n",
      "output_data[\"id\"] = input_data[\"outputs\"][0][\"entity_group\"]\n",
      "\n",
      "# add data to output json\n",
      "output_data[\"data\"] = input_data[\"inputs\"]\n",
      "\n",
      "# add project to output json\n",
      "output_data[\"project\"] = \"Label Studio\"\n",
      "\n",
      "# initialize predictions array\n",
      "predictions = []\n",
      "\n",
      "# loop through each output in input json\n",
      "for output in input_data[\"outputs\"]:\n",
      "    # initialize prediction dict\n",
      "    prediction = {}\n",
      "\n",
      "    # add id to prediction dict\n",
      "    prediction[\"id\"] = output[\"entity_group\"]\n",
      "\n",
      "    # add lead_time to prediction dict\n",
      "    prediction[\"lead_time\"] = 0\n",
      "\n",
      "    # initialize result array\n",
      "    result = []\n",
      "\n",
      "    # initialize result dict\n",
      "    result_dict = {}\n",
      "\n",
      "    # add from_name to result dict\n",
      "    result_dict[\"from_name\"] = output[\"entity_group\"]\n",
      "\n",
      "    # add to_name to result dict\n",
      "    result_dict[\"to_name\"] = output[\"entity_group\"]\n",
      "\n",
      "    # add type to result dict\n",
      "    result_dict[\"type\"] = \"entity\"\n",
      "\n",
      "    # initialize value dict\n",
      "    value_dict = {}\n",
      "\n",
      "    # add value to value dict\n",
      "    value_dict[\"text\"] = output[\"word\"]\n",
      "\n",
      "    # add start to value dict\n",
      "    value_dict[\"start\"] = output[\"start\"]\n",
      "\n",
      "    # add end to value dict\n",
      "    value_dict[\"end\"] = output[\"end\"]\n",
      "\n",
      "    # add value to result dict\n",
      "    result_dict[\"value\"] = value_dict\n",
      "\n",
      "    # add result dict to result array\n",
      "    result.append(result_dict)\n",
      "\n",
      "    # add result to prediction dict\n",
      "    prediction[\"result\"] = result\n",
      "\n",
      "    # add score to prediction dict\n",
      "    prediction[\"score\"] = output[\"score\"]\n",
      "\n",
      "    # add prediction to predictions array\n",
      "    predictions.append(prediction)\n",
      "\n",
      "# add predictions to output json\n",
      "output_data[\"predictions\"] = predictions\n",
      "\n",
      "# print output json to stdout\n",
      "print(json.dumps(output_data))\n"
     ]
    }
   ],
   "source": [
    "print(predictions.code[0])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "adala",
   "language": "python",
   "name": "adala"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
